#ifndef _GNU_SOURCE
#define _GNU_SOURCE // for strdup
#endif
#include <assert.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include "compile.h"
#include "bytecode.h"
#include "locfile.h"
#include "jv_alloc.h"
#include "linker.h"

/*
  The intermediate representation for jq filters is as a sequence of
  struct inst, which form a doubly-linked list via the next and prev
  pointers.

  A "block" represents a sequence of "struct inst", which may be
  empty.

  Blocks are generated by the parser bottom-up, so may have free
  variables (refer to things not defined). See inst.bound_by and
  inst.symbol.
 */
struct inst {
  struct inst* next;
  struct inst* prev;

  opcode op;

  struct {
    uint16_t intval;
    struct inst* target;
    jv constant;
    const struct cfunction* cfunc;
  } imm;

  struct locfile* locfile;
  location source;

  // Binding
  // An instruction requiring binding (for parameters/variables/functions)
  // is in one of three states:
  //   inst->bound_by = NULL  - Unbound free variable
  //   inst->bound_by = inst  - This instruction binds a variable
  //   inst->bound_by = other - Uses variable bound by other instruction
  // Unbound instructions (references to other things that may or may not
  // exist) are created by "gen_foo_unbound", and bindings are created by
  // block_bind(definition, body), which binds all instructions in
  // body which are unbound and refer to "definition" by name.
  struct inst* bound_by;
  char* symbol;
  int any_unbound;
  int referenced;

  int nformals;
  int nactuals;

  block subfn;   // used by CLOSURE_CREATE (body of function)
  block arglist; // used by CLOSURE_CREATE (formals) and CALL_JQ (arguments)

  // This instruction is compiled as part of which function?
  // (only used during block_compile)
  struct bytecode* compiled;

  int bytecode_pos; // position just after this insn
};

static inst* inst_new(opcode op) {
  inst* i = jv_mem_alloc(sizeof(inst));
  i->next = i->prev = 0;
  i->op = op;
  i->bytecode_pos = -1;
  i->bound_by = 0;
  i->symbol = 0;
  i->any_unbound = 0;
  i->referenced = 0;
  i->nformals = -1;
  i->nactuals = -1;
  i->subfn = gen_noop();
  i->arglist = gen_noop();
  i->source = UNKNOWN_LOCATION;
  i->locfile = 0;
  return i;
}

static void inst_free(struct inst* i) {
  jv_mem_free(i->symbol);
  block_free(i->subfn);
  block_free(i->arglist);
  if (i->locfile)
    locfile_free(i->locfile);
  if (opcode_describe(i->op)->flags & OP_HAS_CONSTANT) {
    jv_free(i->imm.constant);
  }
  jv_mem_free(i);
}

static block inst_block(inst* i) {
  block b = {i,i};
  return b;
}

int block_is_single(block b) {
  return b.first && b.first == b.last;
}

static inst* block_take(block* b) {
  if (b->first == 0) return 0;
  inst* i = b->first;
  if (i->next) {
    i->next->prev = 0;
    b->first = i->next;
    i->next = 0;
  } else {
    b->first = 0;
    b->last = 0;
  }
  return i;
}

block gen_location(location loc, struct locfile* l, block b) {
  for (inst* i = b.first; i; i = i->next) {
    if (i->source.start == UNKNOWN_LOCATION.start &&
        i->source.end == UNKNOWN_LOCATION.end) {
      i->source = loc;
      i->locfile = locfile_retain(l);
    }
  }
  return b;
}

block gen_noop() {
  block b = {0,0};
  return b;
}

int block_is_noop(block b) {
  return (b.first == 0 && b.last == 0);
}

block gen_op_simple(opcode op) {
  assert(opcode_describe(op)->length == 1);
  return inst_block(inst_new(op));
}


block gen_const(jv constant) {
  assert(opcode_describe(LOADK)->flags & OP_HAS_CONSTANT);
  inst* i = inst_new(LOADK);
  i->imm.constant = constant;
  return inst_block(i);
}

block gen_const_global(jv constant, const char *name) {
  assert((opcode_describe(STORE_GLOBAL)->flags & (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING)) ==
         (OP_HAS_CONSTANT | OP_HAS_VARIABLE | OP_HAS_BINDING));
  inst* i = inst_new(STORE_GLOBAL);
  i->imm.constant = constant;
  i->symbol = strdup(name);
  i->any_unbound = 0;
  return inst_block(i);
}

block gen_op_pushk_under(jv constant) {
  assert(opcode_describe(PUSHK_UNDER)->flags & OP_HAS_CONSTANT);
  inst* i = inst_new(PUSHK_UNDER);
  i->imm.constant = constant;
  return inst_block(i);
}

int block_is_const(block b) {
  return (block_is_single(b) && (b.first->op == LOADK || b.first->op == PUSHK_UNDER));
}

int block_is_const_inf(block b) {
  return (block_is_single(b) && b.first->op == LOADK &&
          jv_get_kind(b.first->imm.constant) == JV_KIND_NUMBER &&
          isinf(jv_number_value(b.first->imm.constant)));
}

jv_kind block_const_kind(block b) {
  assert(block_is_const(b));
  return jv_get_kind(b.first->imm.constant);
}

jv block_const(block b) {
  assert(block_is_const(b));
  return jv_copy(b.first->imm.constant);
}

block gen_op_target(opcode op, block target) {
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
  assert(target.last);
  inst* i = inst_new(op);
  i->imm.target = target.last;
  return inst_block(i);
}

block gen_op_targetlater(opcode op) {
  assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
  inst* i = inst_new(op);
  i->imm.target = 0;
  return inst_block(i);
}
void inst_set_target(block b, block target) {
  assert(block_is_single(b));
  assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
  assert(target.last);
  b.first->imm.target = target.last;
}

block gen_op_unbound(opcode op, const char* name) {
  assert(opcode_describe(op)->flags & OP_HAS_BINDING);
  inst* i = inst_new(op);
  i->symbol = strdup(name);
  i->any_unbound = 1;
  return inst_block(i);
}

block gen_op_var_fresh(opcode op, const char* name) {
  assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
  block b = gen_op_unbound(op, name);
  b.first->bound_by = b.first;
  return b;
}

block gen_op_bound(opcode op, block binder) {
  assert(block_is_single(binder));
  block b = gen_op_unbound(op, binder.first->symbol);
  b.first->bound_by = binder.first;
  b.first->any_unbound = 0;
  return b;
}

block gen_dictpair(block k, block v) {
  return BLOCK(gen_subexp(k), gen_subexp(v), gen_op_simple(INSERT));
}


static void inst_join(inst* a, inst* b) {
  assert(a && b);
  assert(!a->next);
  assert(!b->prev);
  a->next = b;
  b->prev = a;
}

void block_append(block* b, block b2) {
  if (b2.first) {
    if (b->last) {
      inst_join(b->last, b2.first);
    } else {
      b->first = b2.first;
    }
    b->last = b2.last;
  }
}

block block_join(block a, block b) {
  block c = a;
  block_append(&c, b);
  return c;
}

int block_has_only_binders_and_imports(block binders, int bindflags) {
  bindflags |= OP_HAS_BINDING;
  for (inst* curr = binders.first; curr; curr = curr->next) {
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != DEPS && curr->op != MODULEMETA) {
      return 0;
    }
  }
  return 1;
}

static int inst_is_binder(inst *i, int bindflags) {
  return !((opcode_describe(i->op)->flags & bindflags) != bindflags && i->op != MODULEMETA);
}

int block_has_only_binders(block binders, int bindflags) {
  bindflags |= OP_HAS_BINDING;
  bindflags &= ~OP_BIND_WILDCARD;
  for (inst* curr = binders.first; curr; curr = curr->next) {
    if ((opcode_describe(curr->op)->flags & bindflags) != bindflags && curr->op != MODULEMETA) {
      return 0;
    }
  }
  return 1;
}

// Count a call site's actual params
static int block_count_actuals(block b) {
  int args = 0;
  for (inst* i = b.first; i; i = i->next) {
    switch (i->op) {
    default: assert(0 && "Unknown function type"); break;
    case CLOSURE_CREATE:
    case CLOSURE_PARAM:
    case CLOSURE_CREATE_C:
      args++;
      break;
    }
  }
  return args;
}

static int block_bind_subblock_inner(int* any_unbound, block binder, block body, int bindflags, int break_distance) {
  assert(block_is_single(binder));
  assert((opcode_describe(binder.first->op)->flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD));
  assert(binder.first->symbol);
  assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
  assert(break_distance >= 0);

  binder.first->bound_by = binder.first;
  int nrefs = 0;
  for (inst* i = body.first; i; i = i->next) {
    if (i->any_unbound == 0)
      continue;

    int flags = opcode_describe(i->op)->flags;
    if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by == 0 &&
        (!strcmp(i->symbol, binder.first->symbol) ||
         // Check for break/break2/break3; see parser.y
         ((bindflags & OP_BIND_WILDCARD) && i->symbol[0] == '*' &&
          break_distance <= 3 && (i->symbol[1] == '1' + break_distance) &&
          i->symbol[2] == '\0'))) {
      // bind this instruction
      if (i->nactuals == -1 || i->nactuals == binder.first->nformals) {
        i->bound_by = binder.first;
        nrefs++;
      }
    } else if ((flags & bindflags) == (bindflags & ~OP_BIND_WILDCARD) && i->bound_by != 0 &&
               !strncmp(binder.first->symbol, "*anonlabel", sizeof("*anonlabel") - 1) &&
               !strncmp(i->symbol, "*anonlabel", sizeof("*anonlabel") - 1)) {
      // Increment the break distance required for this binder to match
      // a break whenever we come across a STOREV of *anonlabel...
      break_distance++;
    }

    i->any_unbound = (i->symbol && !i->bound_by);

    // binding recurses into closures
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->subfn, bindflags, break_distance);
    // binding recurses into argument list
    nrefs += block_bind_subblock_inner(&i->any_unbound, binder, i->arglist, bindflags, break_distance);

    if (i->any_unbound)
      *any_unbound = 1;
  }
  return nrefs;
}

static int block_bind_subblock(block binder, block body, int bindflags, int break_distance) {
  int any_unbound;
  return block_bind_subblock_inner(&any_unbound, binder, body, bindflags, break_distance);
}

static int block_bind_each(block binder, block body, int bindflags) {
  assert(block_has_only_binders(binder, bindflags));
  bindflags |= OP_HAS_BINDING;
  int nrefs = 0;
  for (inst* curr = binder.first; curr; curr = curr->next) {
    nrefs += block_bind_subblock(inst_block(curr), body, bindflags, 0);
  }
  return nrefs;
}

static block block_bind(block binder, block body, int bindflags) {
  block_bind_each(binder, body, bindflags);
  return block_join(binder, body);
}

block block_bind_library(block binder, block body, int bindflags, const char *libname) {
  bindflags |= OP_HAS_BINDING;
  int nrefs = 0;
  int matchlen = (libname == NULL) ? 0 : strlen(libname);
  char *matchname = jv_mem_alloc(matchlen+2+1);
  matchname[0] = '\0';
  if (libname != NULL && libname[0] != '\0') {
    strcpy(matchname,libname);
    strcpy(matchname+matchlen, "::");
    matchlen += 2;
  }
  assert(block_has_only_binders(binder, bindflags));
  for (inst *curr = binder.last; curr; curr = curr->prev) {
    int bindflags2 = bindflags;
    char* cname = curr->symbol;
    char* tname = jv_mem_alloc(strlen(curr->symbol)+matchlen+1);
    strcpy(tname, matchname);
    strcpy(tname+matchlen, curr->symbol);

    // Ew
    if ((opcode_describe(curr->op)->flags & (OP_HAS_VARIABLE | OP_HAS_CONSTANT)))
      bindflags2 = OP_HAS_VARIABLE | OP_HAS_BINDING;

    // This mutation is ugly, even if we undo it
    curr->symbol = tname;
    nrefs += block_bind_subblock(inst_block(curr), body, bindflags2, 0);
    curr->symbol = cname;
    free(tname);
  }
  free(matchname);
  return body; // We don't return a join because we don't want those sticking around...
}

static inst* block_take_last(block* b) {
  inst* i = b->last;
  if (i == 0)
    return 0;
  if (i->prev) {
    i->prev->next = i->next;
    b->last = i->prev;
    i->prev = 0;
  } else {
    b->first = 0;
    b->last = 0;
  }
  return i;
}

// Binds a sequence of binders, which *must not* alrady be bound to each other,
// to body, throwing away unreferenced defs
block block_bind_referenced(block binder, block body, int bindflags) {
  assert(block_has_only_binders(binder, bindflags));
  bindflags |= OP_HAS_BINDING;

  inst* curr;
  while ((curr = block_take_last(&binder))) {
    block b = inst_block(curr);
    if (block_bind_subblock(b, body, bindflags, 0) == 0) {
      block_free(b);
    } else {
      body = BLOCK(b, body);
    }
  }
  return body;
}

block block_bind_self(block binder, int bindflags) {
  assert(block_has_only_binders(binder, bindflags));
  bindflags |= OP_HAS_BINDING;
  block body = gen_noop();

  inst* curr;
  while ((curr = block_take_last(&binder))) {
    block b = inst_block(curr);
    block_bind_subblock(b, body, bindflags, 0);
    body = BLOCK(b, body);
  }
  return body;
}

static void block_mark_referenced(block body) {
  int saw_top = 0;
  for (inst* i = body.last; i; i = i->prev) {
    if (saw_top && i->bound_by == i && !i->referenced)
      continue;
    if (i->op == TOP) {
      saw_top = 1;
    }
    if (i->bound_by) {
      i->bound_by->referenced = 1;
    }

    block_mark_referenced(i->arglist);
    block_mark_referenced(i->subfn);
  }
}

block block_drop_unreferenced(block body) {
  block_mark_referenced(body);

  block refd = gen_noop();
  inst* curr;
  while ((curr = block_take(&body))) {
    if (curr->bound_by == curr && !curr->referenced) {
      inst_free(curr);
    } else {
      refd = BLOCK(refd, inst_block(curr));
    }
  }
  return refd;
}

jv block_take_imports(block* body) {
  jv imports = jv_array();

  /* Parser should never generate TOP before imports */
  assert(!(body->first && body->first->op == TOP && body->first->next &&
        (body->first->next->op == MODULEMETA || body->first->next->op == DEPS)));

  while (body->first && (body->first->op == MODULEMETA || body->first->op == DEPS)) {
    inst* dep = block_take(body);
    if (dep->op == DEPS) {
      imports = jv_array_append(imports, jv_copy(dep->imm.constant));
    }
    inst_free(dep);
  }
  return imports;
}

jv block_list_funcs(block body, int omit_underscores) {
  jv funcs = jv_object(); // Use the keys for set semantics.
  for (inst *pos = body.first; pos != NULL; pos = pos->next) {
    if (pos->op == CLOSURE_CREATE || pos->op == CLOSURE_CREATE_C) {
      if (pos->symbol != NULL && (!omit_underscores || pos->symbol[0] != '_')) {
        funcs = jv_object_set(funcs, jv_string_fmt("%s/%i", pos->symbol, pos->nformals), jv_null());
      }
    }
  }
  return jv_keys_unsorted(funcs);
}

block gen_module(block metadata) {
  inst* i = inst_new(MODULEMETA);
  i->imm.constant = block_const(metadata);
  if (jv_get_kind(i->imm.constant) != JV_KIND_OBJECT)
    i->imm.constant = jv_object_set(jv_object(), jv_string("metadata"), i->imm.constant);
  block_free(metadata);
  return inst_block(i);
}

jv block_module_meta(block b) {
  if (b.first != NULL && b.first->op == MODULEMETA)
    return jv_copy(b.first->imm.constant);
  return jv_null();
}

block gen_import(const char* name, const char* as, int is_data) {
  inst* i = inst_new(DEPS);
  jv meta = jv_object();
  if (as != NULL)
    meta = jv_object_set(meta, jv_string("as"), jv_string(as));
  meta = jv_object_set(meta, jv_string("is_data"), is_data ? jv_true() : jv_false());
  meta = jv_object_set(meta, jv_string("relpath"), jv_string(name));
  i->imm.constant = meta;
  return inst_block(i);
}

block gen_import_meta(block import, block metadata) {
  assert(block_is_single(import) && import.first->op == DEPS);
  assert(block_is_const(metadata) && block_const_kind(metadata) == JV_KIND_OBJECT);
  inst *i = import.first;
  i->imm.constant = jv_object_merge(block_const(metadata), i->imm.constant);
  block_free(metadata);
  return import;
}

block gen_function(const char* name, block formals, block body) {
  inst* i = inst_new(CLOSURE_CREATE);
  int nformals = 0;
  for (inst* i = formals.last; i; i = i->prev) {
    nformals++;
    i->nformals = 0;
    if (i->op == CLOSURE_PARAM_REGULAR) {
      i->op = CLOSURE_PARAM;
      body = gen_var_binding(gen_call(i->symbol, gen_noop()), i->symbol, body);
    }
    block_bind_subblock(inst_block(i), body, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
  }
  i->subfn = body;
  i->symbol = strdup(name);
  i->any_unbound = -1;
  i->nformals = nformals;
  i->arglist = formals;
  block b = inst_block(i);
  block_bind_subblock(b, b, OP_IS_CALL_PSEUDO | OP_HAS_BINDING, 0);
  return b;
}

block gen_param_regular(const char* name) {
  return gen_op_unbound(CLOSURE_PARAM_REGULAR, name);
}

block gen_param(const char* name) {
  return gen_op_unbound(CLOSURE_PARAM, name);
}

block gen_lambda(block body) {
  return gen_function("@lambda", gen_noop(), body);
}

block gen_call(const char* name, block args) {
  block b = gen_op_unbound(CALL_JQ, name);
  b.first->arglist = args;
  b.first->nactuals = block_count_actuals(b.first->arglist);
  return b;
}

block gen_subexp(block a) {
  if (block_is_noop(a)) {
    return gen_op_simple(DUP);
  }
  if (block_is_single(a) && a.first->op == LOADK) {
    jv c = block_const(a);
    block_free(a);
    return gen_op_pushk_under(c);
  }
  return BLOCK(gen_op_simple(SUBEXP_BEGIN), a, gen_op_simple(SUBEXP_END));
}

block gen_both(block a, block b) {
  block jump = gen_op_targetlater(JUMP);
  block fork = gen_op_target(FORK, jump);
  block c = BLOCK(fork, a, jump, b);
  inst_set_target(jump, c);
  return c;
}

block gen_const_object(block expr) {
  int is_const = 1;
  jv o = jv_object();
  jv k = jv_null();
  jv v = jv_null();
  for (inst *i = expr.first; i; i = i->next) {
    if (i->op == PUSHK_UNDER) {
      k = jv_copy(i->imm.constant);
      i = i->next;
    } else if (i->op != SUBEXP_BEGIN ||
        i->next == NULL ||
        i->next->op != LOADK ||
        i->next->next == NULL ||
        i->next->next->op != SUBEXP_END) {
      is_const = 0;
      break;
    } else {
      k = jv_copy(i->next->imm.constant);
      i = i->next->next->next;
    }
    if (i != NULL && i->op == PUSHK_UNDER) {
      v = jv_copy(i->imm.constant);
      i = i->next;
    } else if (i == NULL ||
        i->op != SUBEXP_BEGIN ||
        i->next == NULL ||
        i->next->op != LOADK ||
        i->next->next == NULL ||
        i->next->next->op != SUBEXP_END) {
      is_const = 0;
      break;
    } else {
      v = jv_copy(i->next->imm.constant);
      i = i->next->next->next;
    }
    if (i == NULL || i->op != INSERT) {
      is_const = 0;
      break;
    }
    if (jv_get_kind(k) != JV_KIND_STRING) {
      is_const = 0;
      break;
    }
    o = jv_object_set(o, k, v);
    k = jv_null();
    v = jv_null();
  }
  if (!is_const) {
    jv_free(o);
    jv_free(k);
    jv_free(v);
    block b = {0,0};
    return b;
  }
  block_free(expr);
  return gen_const(o);
}

static block gen_const_array(block expr) {
  /*
   * An expr of all constant elements looks like this:
   *
   * 0009 FORK 0027
   * 0011 FORK 0023
   * 0013 FORK 0019
   * 0015 LOADK 1
   * 0017 JUMP 0021
   * 0019 LOADK 2
   * 0021 JUMP 0025
   * 0023 LOADK 3
   * 0025 JUMP 0029
   * 0027 LOADK 4
   *
   * That's: N-1 commas for N elements, N LOADKs, and a JUMP between
   * every LOADK.  The sequence ends in a LOADK.  Any deviation and it's
   * not a list of constants.
   *
   * Here we check for this pattern almost exactly.  We don't check that
   * the targets of the FORK and JUMP instructions are in the right
   * sequence.
   */
  int all_const = 1;
  int commas = 0;
  int normal = 1;
  jv a = jv_array();
  for (inst *i = expr.first; i; i = i->next) {
    if (i->op == FORK) {
      commas++;
      if (i->imm.target == NULL || i->imm.target->op != JUMP ||
          jv_array_length(jv_copy(a)) > 0) {
        normal = 0;
        break;
      }
    } else if (all_const && i->op == LOADK) {
      if (i->next != NULL && i->next->op != JUMP) {
        normal = 0;
        break;
      }
      a = jv_array_append(a, jv_copy(i->imm.constant));
    } else if (i->op != JUMP || i->imm.target == NULL ||
               i->imm.target->op != LOADK) {
      all_const = 0;
    }
  }

  if (all_const && normal &&
      (expr.last == NULL || expr.last->op == LOADK) &&
      jv_array_length(jv_copy(a)) == commas + 1) {
    block_free(expr);
    return gen_const(a);
  }

  jv_free(a);
  block b = {0,0};
  return b;
}

block gen_collect(block expr) {
  block const_array = gen_const_array(expr);
  if (const_array.first != NULL)
    return const_array;

  block array_var = gen_op_var_fresh(STOREV, "collect");
  block c = BLOCK(gen_op_simple(DUP), gen_const(jv_array()), array_var);

  block tail = BLOCK(gen_op_bound(APPEND, array_var),
                     gen_op_simple(BACKTRACK));

  return BLOCK(c,
               gen_op_target(FORK, tail),
               expr,
               tail,
               gen_op_bound(LOADVN, array_var));
}

static block bind_matcher(block matcher, block body) {
  // cannot call block_bind(matcher, body) because that requires
  // block_has_only_binders(matcher), which is not true here as matchers
  // may also contain code to extract the correct elements
  for (inst* i = matcher.first; i; i = i->next) {
    if ((i->op == STOREV || i->op == STOREVN) && !i->bound_by)
      block_bind_subblock(inst_block(i), body, OP_HAS_VARIABLE, 0);
  }
  return BLOCK(matcher, body);
}


// Extract destructuring var names from the block
// *vars should be a jv_object (for set semantics)
static void block_get_unbound_vars(block b, jv *vars) {
  assert(vars != NULL);
  assert(jv_get_kind(*vars) == JV_KIND_OBJECT);
  for (inst* i = b.first; i; i = i->next) {
    if (i->subfn.first) {
      block_get_unbound_vars(i->subfn, vars);
      continue;
    }
    if ((i->op == STOREV || i->op == STOREVN) && i->bound_by == NULL) {
      *vars = jv_object_set(*vars, jv_string(i->symbol), jv_true());
    }
  }
}

/* Build wrappers around destructuring matchers so that we can chain them
 * when we have errors.  The approach is as follows:
 * DESTRUCTURE_ALT NEXT_MATCHER (unless last matcher)
 * existing_matcher_block
 * JUMP BODY
 */
static block bind_alternation_matchers(block matchers, block body) {
  block preamble = {0};
  block altmatchers = {0};
  block mb = {0};
  block final_matcher = matchers;

  // Pass through the matchers to find all destructured names.
  while (final_matcher.first && final_matcher.first->op == DESTRUCTURE_ALT) {
    block_append(&altmatchers, inst_block(block_take(&final_matcher)));
  }

  // We don't have any alternations here, so we can use the simplest case.
  if (altmatchers.first == NULL) {
    return bind_matcher(final_matcher, body);
  }

  // Collect var names
  jv all_vars = jv_object();
  block_get_unbound_vars(altmatchers, &all_vars);
  block_get_unbound_vars(final_matcher, &all_vars);

  // We need a preamble of STOREVs to which to bind the matchers and the body.
  jv_object_keys_foreach(all_vars, key) {
    preamble = BLOCK(preamble,
                     gen_op_simple(DUP),
                     gen_const(jv_null()),
                     gen_op_unbound(STOREV, jv_string_value(key)));
    jv_free(key);
  }
  jv_free(all_vars);

  // Now we build each matcher in turn
  for (inst *i = altmatchers.first; i; i = i->next) {
    block submatcher = i->subfn;

    // If we're successful, jump to the end of the matchers
    submatcher = BLOCK(submatcher, gen_op_target(JUMP, final_matcher));

    // DESTRUCTURE_ALT to the end of this submatcher so we can skip to the next one on error
    mb = BLOCK(mb, gen_op_target(DESTRUCTURE_ALT, submatcher), submatcher);

    // We're done with this inst and we don't want it anymore
    // But we can't let it free the submatcher block.
    i->subfn.first = i->subfn.last = NULL;
  }
  // We're done with these insts now.
  block_free(altmatchers);

  return bind_matcher(preamble, BLOCK(mb, final_matcher, body));
}

block gen_reduce(block source, block matcher, block init, block body) {
  block res_var = gen_op_var_fresh(STOREV, "reduce");
  block loop = BLOCK(gen_op_simple(DUPN),
                     source,
                     bind_alternation_matchers(matcher,
                                  BLOCK(gen_op_bound(LOADVN, res_var),
                                        body,
                                        gen_op_bound(STOREV, res_var))),
                     gen_op_simple(BACKTRACK));
  return BLOCK(gen_op_simple(DUP),
               init,
               res_var,
               gen_op_target(FORK, loop),
               loop,
               gen_op_bound(LOADVN, res_var));
}

block gen_foreach(block source, block matcher, block init, block update, block extract) {
  block output = gen_op_targetlater(JUMP);
  block state_var = gen_op_var_fresh(STOREV, "foreach");
  block loop = BLOCK(gen_op_simple(DUPN),
                     // get a value from the source expression:
                     source,
                     // destructure the value into variable(s) for all the code
                     // in the body to see
                     bind_alternation_matchers(matcher,
                                  // load the loop state variable
                                  BLOCK(gen_op_bound(LOADVN, state_var),
                                        // generate updated state
                                        update,
                                        // save the updated state for value extraction
                                        gen_op_simple(DUP),
                                        // save new state
                                        gen_op_bound(STOREV, state_var),
                                        // extract an output...
                                        extract,
                                        // ...and output it by jumping
                                        // past the BACKTRACK that comes
                                        // right after the loop body,
                                        // which in turn is there
                                        // because...
                                        //
                                        // (Incidentally, extract can also
                                        // backtrack, e.g., if it calls
                                        // empty, in which case we don't
                                        // get here.)
                                        output)));
  block foreach = BLOCK(gen_op_simple(DUP),
                        init,
                        state_var,
                        gen_op_target(FORK, loop),
                        loop,
                        // ...at this point `foreach`'s original input
                        // will be on top of the stack, and we don't
                        // want to output it, so we backtrack.
                        gen_op_simple(BACKTRACK));
  inst_set_target(output, foreach); // make that JUMP go bast the BACKTRACK at the end of the loop
  return foreach;
}

block gen_definedor(block a, block b) {
  // var found := false
  block found_var = gen_op_var_fresh(STOREV, "found");
  block init = BLOCK(gen_op_simple(DUP), gen_const(jv_false()), found_var);

  // if found, backtrack. Otherwise execute b
  block backtrack = gen_op_simple(BACKTRACK);
  block tail = BLOCK(gen_op_simple(DUP),
                     gen_op_bound(LOADV, found_var),
                     gen_op_target(JUMP_F, backtrack),
                     backtrack,
                     gen_op_simple(POP),
                     b);

  // try again
  block if_notfound = gen_op_simple(BACKTRACK);

  // found := true, produce result
  block if_found = BLOCK(gen_op_simple(DUP),
                         gen_const(jv_true()),
                         gen_op_bound(STOREV, found_var),
                         gen_op_target(JUMP, tail));

  return BLOCK(init,
               gen_op_target(FORK, if_notfound),
               a,
               gen_op_target(JUMP_F, if_found),
               if_found,
               if_notfound,
               tail);
}

int block_has_main(block top) {
  for (inst *c = top.first; c; c = c->next) {
    if (c->op == TOP)
      return 1;
  }
  return 0;
}

int block_is_funcdef(block b) {
  if (b.first != NULL && b.first->op == CLOSURE_CREATE)
    return 1;
  return 0;
}

block gen_condbranch(block iftrue, block iffalse) {
  iftrue = BLOCK(iftrue, gen_op_target(JUMP, iffalse));
  return BLOCK(gen_op_target(JUMP_F, iftrue), iftrue, iffalse);
}

block gen_and(block a, block b) {
  // a and b = if a then (if b then true else false) else false
  return BLOCK(gen_op_simple(DUP), a,
               gen_condbranch(BLOCK(gen_op_simple(POP),
                                    b,
                                    gen_condbranch(gen_const(jv_true()),
                                                   gen_const(jv_false()))),
                              BLOCK(gen_op_simple(POP), gen_const(jv_false()))));
}

block gen_or(block a, block b) {
  // a or b = if a then true else (if b then true else false)
  return BLOCK(gen_op_simple(DUP), a,
               gen_condbranch(BLOCK(gen_op_simple(POP), gen_const(jv_true())),
                              BLOCK(gen_op_simple(POP),
                                    b,
                                    gen_condbranch(gen_const(jv_true()),
                                                   gen_const(jv_false())))));
}

block gen_destructure_alt(block matcher) {
  for (inst *i = matcher.first; i; i = i->next) {
    if (i->op == STOREV) {
      i->op = STOREVN;
    }
  }
  inst* i = inst_new(DESTRUCTURE_ALT);
  i->subfn = matcher;
  return inst_block(i);
}

block gen_var_binding(block var, const char* name, block body) {
  return gen_destructure(var, gen_op_unbound(STOREV, name), body);
}

block gen_array_matcher(block left, block curr) {
  int index;
  if (block_is_noop(left))
    index = 0;
  else {
    // `left` was returned by this function, so the third inst is the
    // constant containing the previously used index
    assert(left.first->op == DUP);
    assert(left.first->next != NULL);
    inst *i = NULL;
    if (left.first->next->op == PUSHK_UNDER) {
      i = left.first->next;
    } else {
      assert(left.first->next->op == SUBEXP_BEGIN);
      assert(left.first->next->next->op == LOADK);
      i = left.first->next->next;
    }
    index = 1 + (int) jv_number_value(i->imm.constant);
  }

  // `left` goes at the end so that the const index is in a predictable place
  return BLOCK(gen_op_simple(DUP), gen_subexp(gen_const(jv_number(index))),
               gen_op_simple(INDEX), curr, left);
}

block gen_object_matcher(block name, block curr) {
  return BLOCK(gen_op_simple(DUP), gen_subexp(name), gen_op_simple(INDEX),
               curr);
}

block gen_destructure(block var, block matchers, block body) {
  // var bindings can be added after coding the program; leave the TOP first.
  block top = gen_noop();
  if (body.first && body.first->op == TOP)
    top = inst_block(block_take(&body));

  if (matchers.first && matchers.first->op == DESTRUCTURE_ALT) {
    block_append(&var, gen_op_simple(DUP));
  } else {
    top = BLOCK(top, gen_op_simple(DUP));
  }

  return BLOCK(top, gen_subexp(var), gen_op_simple(POP), bind_alternation_matchers(matchers, body));
}

// Like gen_var_binding(), but bind `break`'s wildcard unbound variable
static block gen_wildvar_binding(block var, const char* name, block body) {
  return BLOCK(gen_op_simple(DUP), var,
               block_bind(gen_op_unbound(STOREV, name),
                          body, OP_HAS_VARIABLE | OP_BIND_WILDCARD));
}

block gen_cond(block cond, block iftrue, block iffalse) {
  return BLOCK(gen_op_simple(DUP), BLOCK(gen_subexp(cond), gen_op_simple(POP)),
               gen_condbranch(BLOCK(gen_op_simple(POP), iftrue),
                              BLOCK(gen_op_simple(POP), iffalse)));
}

block gen_try_handler(block handler) {
  // Quite a pain just to hide jq's internal errors.
  return gen_cond(// `if type=="object" and .__jq
                  gen_and(gen_call("_equal",
                                   BLOCK(gen_lambda(gen_const(jv_string("object"))),
                                         gen_lambda(gen_call("type", gen_noop())))),
                          BLOCK(gen_subexp(gen_const(jv_string("__jq"))),
                                gen_noop(),
                                gen_op_simple(INDEX))),
                  // `then error`
                  gen_call("error", gen_noop()),
                  // `else HANDLER end`
                  handler);
}

block gen_try(block exp, block handler) {
  /*
   * Produce something like:
   *  FORK_OPT <address of handler>
   *  <exp>
   *  JUMP <end of handler>
   *  <handler>
   *
   * If this is not an internal try/catch, then catch and re-raise
   * internal errors to prevent them from leaking.
   *
   * The handler will only execute if we backtrack to the FORK_OPT with
   * an error (exception).  If <exp> produces no value then FORK_OPT
   * will backtrack (propagate the `empty`, as it were.  If <exp>
   * produces a value then we'll execute whatever bytecode follows this
   * sequence.
   */
  if (!handler.first && !handler.last)
    // A hack to deal with `.` as the handler; we could use a real NOOP here
    handler = BLOCK(gen_op_simple(DUP), gen_op_simple(POP), handler);
  exp = BLOCK(exp, gen_op_target(JUMP, handler));
  return BLOCK(gen_op_target(FORK_OPT, exp), exp, handler);
}

block gen_label(const char *label, block exp) {
  block cond = gen_call("_equal",
                        BLOCK(gen_lambda(gen_noop()),
                              gen_lambda(gen_op_unbound(LOADV, label))));
  return gen_wildvar_binding(gen_op_simple(GENLABEL), label,
                             BLOCK(gen_op_simple(POP),
                                   // try exp catch if . == $label
                                   //               then empty
                                   //               else error end
                                   //
                                   // Can't use gen_binop(), as that's firmly
                                   // stuck in parser.y as it refers to things
                                   // like EQ.
                                   gen_try(exp,
                                           gen_cond(cond,
                                                    gen_op_simple(BACKTRACK),
                                                    gen_call("error", gen_noop())))));
}

block gen_cbinding(const struct cfunction* cfunctions, int ncfunctions, block code) {
  for (int cfunc=0; cfunc<ncfunctions; cfunc++) {
    inst* i = inst_new(CLOSURE_CREATE_C);
    i->imm.cfunc = &cfunctions[cfunc];
    i->symbol = strdup(cfunctions[cfunc].name);
    i->nformals = cfunctions[cfunc].nargs - 1;
    i->any_unbound = 0;
    code = BLOCK(inst_block(i), code);
  }
  return code;
}

static uint16_t nesting_level(struct bytecode* bc, inst* target) {
  uint16_t level = 0;
  assert(bc && target && target->compiled);
  while (bc && target->compiled != bc) {
    level++;
    bc = bc->parent;
  }
  assert(bc && bc == target->compiled);
  return level;
}

static int count_cfunctions(block b) {
  int n = 0;
  for (inst* i = b.first; i; i = i->next) {
    if (i->op == CLOSURE_CREATE_C) n++;
    n += count_cfunctions(i->subfn);
  }
  return n;
}

#ifndef WIN32
extern char **environ;
#endif

static jv
make_env(jv env)
{
  if (jv_is_valid(env))
    return jv_copy(env);
  jv r = jv_object();
  if (environ == NULL)
    return r;
  for (size_t i = 0; environ[i] != NULL; i++) {
    const char *eq;

    if ((eq = strchr(environ[i], '=')) == NULL)
      r = jv_object_delete(r, jv_string(environ[i]));
    else
      r = jv_object_set(r, jv_string_sized(environ[i], eq - environ[i]), jv_string(eq + 1));
  }
  return jv_copy(r);
}

// Expands call instructions into a calling sequence
static int expand_call_arglist(block* b, jv args, jv *env) {
  int errors = 0;
  block ret = gen_noop();
  for (inst* curr; (curr = block_take(b));) {
    if (opcode_describe(curr->op)->flags & OP_HAS_BINDING) {
      if (!curr->bound_by && curr->op == LOADV && strcmp(curr->symbol, "ENV") == 0) {
        curr->op = LOADK;
        *env = curr->imm.constant = make_env(*env);
      } else if (!curr->bound_by && curr->op == LOADV && jv_object_has(jv_copy(args), jv_string(curr->symbol))) {
        curr->op = LOADK;
        curr->imm.constant = jv_object_get(jv_copy(args), jv_string(curr->symbol));
      } else if (!curr->bound_by) {
        if (curr->symbol[0] == '*' && curr->symbol[1] >= '1' && curr->symbol[1] <= '3' && curr->symbol[2] == '\0')
          locfile_locate(curr->locfile, curr->source, "jq: error: break used outside labeled control structure");
        else if (curr->op == LOADV)
          locfile_locate(curr->locfile, curr->source, "jq: error: $%s is not defined", curr->symbol);
        else
          locfile_locate(curr->locfile, curr->source, "jq: error: %s/%d is not defined", curr->symbol, curr->nactuals);
        errors++;
        // don't process this instruction if it's not well-defined
        ret = BLOCK(ret, inst_block(curr));
        continue;
      }
    }

    block prelude = gen_noop();
    if (curr->op == CALL_JQ) {
      int actual_args = 0, desired_args = 0;
      // We expand the argument list as a series of instructions
      switch (curr->bound_by->op) {
      default: assert(0 && "Unknown function type"); break;
      case CLOSURE_CREATE:
      case CLOSURE_PARAM: {
        block callargs = gen_noop();
        for (inst* i; (i = block_take(&curr->arglist));) {
          assert(opcode_describe(i->op)->flags & OP_IS_CALL_PSEUDO);
          block b = inst_block(i);
          switch (i->op) {
          default: assert(0 && "Unknown type of parameter"); break;
          case CLOSURE_REF:
            block_append(&callargs, b);
            break;
          case CLOSURE_CREATE:
            block_append(&prelude, b);
            block_append(&callargs, gen_op_bound(CLOSURE_REF, b));
            break;
          }
          actual_args++;
        }
        curr->imm.intval = actual_args;
        curr->arglist = callargs;

        if (curr->bound_by->op == CLOSURE_CREATE) {
          for (inst* i = curr->bound_by->arglist.first; i; i = i->next) {
            assert(i->op == CLOSURE_PARAM);
            desired_args++;
          }
        }
        break;
      }

      case CLOSURE_CREATE_C: {
        for (inst* i; (i = block_take(&curr->arglist)); ) {
          assert(i->op == CLOSURE_CREATE); // FIXME
          block body = i->subfn;
          i->subfn = gen_noop();
          inst_free(i);
          // arguments should be pushed in reverse order, prepend them to prelude
          errors += expand_call_arglist(&body, args, env);
          prelude = BLOCK(gen_subexp(body), prelude);
          actual_args++;
        }
        assert(curr->op == CALL_JQ);
        curr->op = CALL_BUILTIN;
        curr->imm.intval = actual_args + 1 /* include the implicit input in arg count */;
        assert(curr->bound_by->op == CLOSURE_CREATE_C);
        desired_args = curr->bound_by->imm.cfunc->nargs - 1;
        assert(!curr->arglist.first);
        break;
      }
      }

      assert(actual_args == desired_args); // because now handle this above
    }
    ret = BLOCK(ret, prelude, inst_block(curr));
  }
  *b = ret;
  return errors;
}

static int compile(struct bytecode* bc, block b, struct locfile* lf, jv args, jv *env) {
  int errors = 0;
  int pos = 0;
  int var_frame_idx = 0;
  bc->nsubfunctions = 0;
  errors += expand_call_arglist(&b, args, env);
  b = BLOCK(b, gen_op_simple(RET));
  jv localnames = jv_array();
  for (inst* curr = b.first; curr; curr = curr->next) {
    if (!curr->next) assert(curr == b.last);
    int length = opcode_describe(curr->op)->length;
    if (curr->op == CALL_JQ) {
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
        length += 2;
      }
    }
    pos += length;
    curr->bytecode_pos = pos;
    curr->compiled = bc;

    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);

    if ((opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) &&
        curr->bound_by == curr) {
      curr->imm.intval = var_frame_idx++;
      localnames = jv_array_append(localnames, jv_string(curr->symbol));
    }

    if (curr->op == CLOSURE_CREATE) {
      assert(curr->bound_by == curr);
      curr->imm.intval = bc->nsubfunctions++;
    }
    if (curr->op == CLOSURE_CREATE_C) {
      assert(curr->bound_by == curr);
      int idx = bc->globals->ncfunctions++;
      bc->globals->cfunc_names = jv_array_append(bc->globals->cfunc_names,
                                                 jv_string(curr->symbol));
      bc->globals->cfunctions[idx] = *curr->imm.cfunc;
      curr->imm.intval = idx;
    }
  }
  if (pos > 0xFFFF) {
    // too long for program counter to fit in uint16_t
    locfile_locate(lf, UNKNOWN_LOCATION,
        "function compiled to %d bytes which is too long", pos);
    errors++;
  }
  bc->codelen = pos;
  bc->debuginfo = jv_object_set(bc->debuginfo, jv_string("locals"), localnames);
  if (bc->nsubfunctions) {
    bc->subfunctions = jv_mem_calloc(sizeof(struct bytecode*), bc->nsubfunctions);
    for (inst* curr = b.first; curr; curr = curr->next) {
      if (curr->op == CLOSURE_CREATE) {
        struct bytecode* subfn = jv_mem_alloc(sizeof(struct bytecode));
        bc->subfunctions[curr->imm.intval] = subfn;
        subfn->globals = bc->globals;
        subfn->parent = bc;
        subfn->nclosures = 0;
        subfn->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_string(curr->symbol));
        jv params = jv_array();
        for (inst* param = curr->arglist.first; param; param = param->next) {
          assert(param->op == CLOSURE_PARAM);
          assert(param->bound_by == param);
          param->imm.intval = subfn->nclosures++;
          param->compiled = subfn;
          params = jv_array_append(params, jv_string(param->symbol));
        }
        subfn->debuginfo = jv_object_set(subfn->debuginfo, jv_string("params"), params);
        errors += compile(subfn, curr->subfn, lf, args, env);
        curr->subfn = gen_noop();
      }
    }
  } else {
    bc->subfunctions = 0;
  }
  uint16_t* code = jv_mem_calloc(sizeof(uint16_t), bc->codelen);
  bc->code = code;
  pos = 0;
  jv constant_pool = jv_array();
  int maxvar = -1;
  if (!errors) for (inst* curr = b.first; curr; curr = curr->next) {
    const struct opcode_description* op = opcode_describe(curr->op);
    if (op->length == 0)
      continue;
    code[pos++] = curr->op;
    assert(curr->op != CLOSURE_REF && curr->op != CLOSURE_PARAM);
    if (curr->op == CALL_BUILTIN) {
      assert(curr->bound_by->op == CLOSURE_CREATE_C);
      assert(!curr->arglist.first);
      code[pos++] = (uint16_t)curr->imm.intval;
      code[pos++] = curr->bound_by->imm.intval;
    } else if (curr->op == CALL_JQ) {
      assert(curr->bound_by->op == CLOSURE_CREATE ||
             curr->bound_by->op == CLOSURE_PARAM);
      code[pos++] = (uint16_t)curr->imm.intval;
      code[pos++] = nesting_level(bc, curr->bound_by);
      code[pos++] = curr->bound_by->imm.intval |
        (curr->bound_by->op == CLOSURE_CREATE ? ARG_NEWCLOSURE : 0);
      for (inst* arg = curr->arglist.first; arg; arg = arg->next) {
        assert(arg->op == CLOSURE_REF && arg->bound_by->op == CLOSURE_CREATE);
        code[pos++] = nesting_level(bc, arg->bound_by);
        code[pos++] = arg->bound_by->imm.intval | ARG_NEWCLOSURE;
      }
    } else if ((op->flags & OP_HAS_CONSTANT) && (op->flags & OP_HAS_VARIABLE)) {
      // STORE_GLOBAL: constant global, basically
      code[pos++] = jv_array_length(jv_copy(constant_pool));
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
      code[pos++] = nesting_level(bc, curr->bound_by);
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
      code[pos++] = var;
    } else if (op->flags & OP_HAS_CONSTANT) {
      code[pos++] = jv_array_length(jv_copy(constant_pool));
      constant_pool = jv_array_append(constant_pool, jv_copy(curr->imm.constant));
    } else if (op->flags & OP_HAS_VARIABLE) {
      code[pos++] = nesting_level(bc, curr->bound_by);
      uint16_t var = (uint16_t)curr->bound_by->imm.intval;
      code[pos++] = var;
      if (var > maxvar) maxvar = var;
    } else if (op->flags & OP_HAS_BRANCH) {
      assert(curr->imm.target->bytecode_pos != -1);
      assert(curr->imm.target->bytecode_pos > pos); // only forward branches
      code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
      pos++;
    } else if (op->length > 1) {
      assert(0 && "codegen not implemented for this operation");
    }
  }
  bc->constants = constant_pool;
  bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
  block_free(b);
  return errors;
}

int block_compile(block b, struct bytecode** out, struct locfile* lf, jv args) {
  struct bytecode* bc = jv_mem_alloc(sizeof(struct bytecode));
  bc->parent = 0;
  bc->nclosures = 0;
  bc->globals = jv_mem_alloc(sizeof(struct symbol_table));
  int ncfunc = count_cfunctions(b);
  bc->globals->ncfunctions = 0;
  bc->globals->cfunctions = jv_mem_calloc(sizeof(struct cfunction), ncfunc);
  bc->globals->cfunc_names = jv_array();
  bc->debuginfo = jv_object_set(jv_object(), jv_string("name"), jv_null());
  jv env = jv_invalid();
  int nerrors = compile(bc, b, lf, args, &env);
  jv_free(args);
  jv_free(env);
  assert(bc->globals->ncfunctions == ncfunc);
  if (nerrors > 0) {
    bytecode_free(bc);
    *out = 0;
  } else {
    *out = bc;
  }
  return nerrors;
}

void block_free(block b) {
  struct inst* next;
  for (struct inst* curr = b.first; curr; curr = next) {
    next = curr->next;
    inst_free(curr);
  }
}
